#!/bin/bash

# Usage message

usage="
\n---USAGE---\n
\n
    Description:\n\n
This script make a biom file using a mapping file and a folder containing tax files. \n
\n
    Required inputs\n
\n
-m mapping file containing metadata [txt]\n
-t folder contianing tax files \n
\n
    Output:\n
\n
An OTU database \n
An OTU database [fasta] \n
A tax database \n
A biom file \n\n
Example of usage:\n
$0 -m map_Mocks.txt -t tax_files \n\n
output:\n
map_Mocks_otu_datbase \n
map_Mocks_otu_database.fa \n
map_Mocks_tax_database \n
map_Mocks.biom \n\n
--- End of USAGE ---\n
"

# Help option, check number of argument.

if [ "$1" == "-h" ]
  then
    echo -e $usage
    exit 0
elif [[ $# -ne 4 ]]
  then
    echo -e "ERROR: Invalid number of args \nTry $0 -h for help"
    exit 1
fi

# Assign arguments to variables, check for invalid arguments.

while getopts m:t: opt
  do
    case "$opt" in
    m)
      map=${OPTARG}
      ;;
    t)
	    folder_tax_files=${OPTARG}
      ;;
    *)
      echo -e "\nERROR: invalid arguments used\n";
      exit 1
      ;;
    esac
done

# Check library files presence.

if [[ ! -s $map ]]
	then
		echo -e "\nERROR: $map not present or empty\n"
		exit 1

elif [[ ! -d $folder_tax_files  ]]
	then
		echo -e "\nERROR: $folder_tax_files not present or not a directory\n"
		exit 1
fi

mkdir -p otu_tables_files biom_files

map_name=$(echo $map | sed 's/\.txt//'  )

rm -f $map_name"_otu_database" $map_name"_tax_database"

cat $folder_tax_files"/"*"_tax_file" | awk '{if(substr($1,1,1)==">"){tax=$2}else{print $1"\t"tax}}' | \
LANG=en_EN sort | \
uniq | \
awk '{i+=1;print $0"\t"i}' | \
sed 's/;__/;p__/' | \
sed 's/;__/;c__/' | \
sed 's/;__/;o__/' | \
sed 's/;__/;f__/' | \
sed 's/;__/;g__/' | \
awk -v v_map_name=$map_name '{print $1"\t"$3 > v_map_name"_otu_database" ; print $3"\tk__"$2 > v_map_name"_tax_database"}'


for sample in $(awk '{if(NR>1 && $1!=""){print $1}}' $map)
  do
    awk  '{ \
      if (FNR==1){ \
        x++ \
      }\
    } \
    { \
      if (x==1){ \
        otu_number[$1]=$2 \
      } \
    } \
    { \
      if (x==2){ \
        if (substr($1,1,1)==">"){ \
          otu=$1 \
        } \
        else{ \
          print otu_number[$1]"\t"otu \
        } \
      } \
    }' $map_name"_otu_database" $folder_tax_files"/"$sample"_tax_file" | \
    sed 's/_/\t/g'  | \
    awk 'BEGIN{ \
      n=1 \
    } \
    { \
      name=$1; \
      for(i=n;i<n+$7;i++){ \
        name=name"\t"$4"_"i \
      }; \
      print name; \
      n=i \
    }' > "otu_tables_files/"$sample"_otu_table"

    make_otu_table.py -i "otu_tables_files/"$sample"_otu_table" -t $map_name"_tax_database" -o "biom_files/"$sample".biom"

done

awk '{ \
  print ">"$2; \
  print $1 \
}' $map_name"_otu_database" > $map_name"_otu_database.fa"

map_name=$(echo $map | sed 's/\.txt//'  )

for e in $(awk '{if(NR==2){print $1}}' $map)
  do
    tables_to_merge="biom_files/"$e".biom"
done

for e in $(awk '{if(NR>2 && $1!=""){print $1}}' $map)
  do
    tables_to_merge=$tables_to_merge,"biom_files/"$e".biom"
done

merge_otu_tables.py -i $tables_to_merge -o $map_name".biom"

rm -r otu_tables_files
rm -r biom_files
